import time
import numpy as np
from sklearn.model_selection import ParameterSampler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from experiments.utils import load_dataset_safely, seed_everything


def random_search_baseline(dataset_name: str, n_iter: int = 50, random_state: int = 42):
    seed_everything(random_state)
    data, msg = load_dataset_safely(dataset_name)
    if data is None:
        raise RuntimeError(msg)

    start = time.time()

    preproc_space = [
        ("imputer", [SimpleImputer(strategy=s) for s in ["mean", "median", "most_frequent"]]),
        ("scaler", [None, StandardScaler()]),
    ]

    param_space = {
        "clf__n_estimators": [50, 100, 200],
        "clf__max_depth": [None, 5, 10, 20],
        "clf__min_samples_split": [2, 5, 10],
    }

    # Build a pipeline factory
    def build_pipeline(imputer, scaler):
        steps = []
        steps.append(("imputer", imputer))
        if scaler is not None:
            steps.append(("scaler", scaler))
        steps.append(("clf", RandomForestClassifier(random_state=random_state)))
        return Pipeline(steps)

    # Sample pipelines and hyperparams
    best_score = 0.0
    best_cfg = None

    X_train, y_train = data["X_train"], data["y_train"]
    X_val, y_val = data["X_val"], data["y_val"]

    rng = np.random.RandomState(random_state)

    # Create all combinations of preproc choices
    preproc_combos = []
    for imputer in preproc_space[0][1]:
        for scaler in preproc_space[1][1]:
            preproc_combos.append((imputer, scaler))

    for i in range(n_iter):
        imputer, scaler = preproc_combos[rng.randint(0, len(preproc_combos))]
        pipeline = build_pipeline(imputer, scaler)
        # sample hyperparams
        sampler = list(ParameterSampler(param_space, n_iter=1, random_state=rng))
        params = sampler[0]
        pipeline.set_params(**params)
        try:
            pipeline.fit(X_train, y_train)
            preds = pipeline.predict(X_val)
            score = accuracy_score(y_val, preds)
        except Exception:
            score = 0.0
        if score > best_score:
            best_score = score
            best_cfg = (imputer, scaler, params)

    duration = time.time() - start
    return {
        "val_score": best_score,
        "best_config": str(best_cfg),
        "time_sec": duration,
    }


if __name__ == "__main__":
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument("--dataset", default="iris")
    p.add_argument("--n_iter", type=int, default=50)
    args = p.parse_args()
    res = random_search_baseline(args.dataset, args.n_iter)
    print(res)
